package com.adu.music.parser;

import com.adu.music.bean.Proxy;
import com.adu.music.util.CommonUtils;
import com.adu.music.util.JsoupUtils;
import com.adu.music.util.RegexUtils;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

/**
 * @author duchuanchuan
 * @date 2017/1/7
 */
public class XicidailiParser implements ProxyParser {
    private Logger logger = LoggerFactory.getLogger(XicidailiParser.class);

    /**
     * parse代理
     * @param baseUrl 基础url
     * @return 代理集合
     */
    private void parseProxyList(String baseUrl, List<Proxy> proxies) {
        // 抓取前2页代理
        final int maxIndex = 2;
        int index = 1;
        for (int i = index; i <= maxIndex; i++) {
            String url = baseUrl + i;
            try {
                Optional<Document> documentOptional = Optional.of(JsoupUtils.connectWithBaseHeaders(url).get());
                documentOptional.ifPresent(doc -> {
                    Optional<Elements> iplist = Optional.of(doc.select("#ip_list tr"));
                    iplist.ifPresent(list -> {
                        if (list.size() == 0) {
                            logger.info("xicidaili parse error or has not proxy url : {}", url);
                            return;
                        }
                        list.forEach(tr -> {
                            Elements tds = tr.getElementsByTag("td");
                            if (tr.text().contains("国家") || tds == null || tds.size() == 0) {
                                logger.info("xicidaili parse error or has no td url : {}", url);
                                return;
                            }
                            String liveTimeStr = tds.get(8).text();
                            // 只要天
                            if (!liveTimeEnough(liveTimeStr)) {
                                return;
                            }
                            String ip = tds.get(1).text();
                            int port = CommonUtils.parseInt(tds.get(2).text());
                            String type = tds.get(5).text();
                            String speedStr = tds.get(6).child(0).attr("title");
                            speedStr = StringUtils.substringBefore(speedStr, "秒");
                            double speed = Double.parseDouble(speedStr);
                            Proxy proxy = new Proxy(ip, port, type, "GET,POST", speed);
                            proxies.add(proxy);
                        });
                    });
                });
            } catch (IOException e) {
                logger.error("xicidaili parser url : {}, error : {}", url, e);
                e.printStackTrace();
            }
        }
    }

    private boolean liveTimeEnough(String liveTimeStr){
        if(StringUtils.contains(liveTimeStr, "分钟")){
            return false;
        }else if(StringUtils.contains(liveTimeStr, "小时")){
            int hour = CommonUtils.parseInt(RegexUtils.getMatchedString("(\\d+?).*", liveTimeStr, 1));
            if(hour < 12) return false;
        }
        return true;
    }

    @Override
    public List<Proxy> parseProxyList() {
        List<Proxy> proxies = new ArrayList<>();
        // 国内高匿代理
        parseProxyList("http://www.xicidaili.com/nn/", proxies);
        // 国内普通代理
        parseProxyList("http://www.xicidaili.com/nt/", proxies);
        // 国内HTTPS代理
        parseProxyList("http://www.xicidaili.com/wn/", proxies);
        // 国内HTTP代理
        parseProxyList("http://www.xicidaili.com/wt/", proxies);
        logger.info("find xicidaili proxy {} items", proxies.size());
        return proxies;
    }
}
